/* Copyright (c) 2022-2022, LiWangQian<liwangqian@huawei.com> All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this list of
 *    conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
 *    of conditions and the following disclaimer in the documentation and/or other materials
 *    provided with the distribution.
 *
 * 3. Neither the name of the copyright holder nor the names of its contributors may be used
 *    to endorse or promote products derived from this software without specific prior written
 *    permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
#include "lexer.h"
#include "../utils/lexer_utils.h"
#include <stdlib.h>
#include <stdbool.h>

#ifdef __cplusplus
extern "C" {
#endif

#define LEXER_LA(lexer, i) \
    ((lexer)->input[(lexer)->index + (i)])

#define LEXER_LB(lexer, i) \
    ((lexer)->input[(lexer)->index - (i)])

#define LEXER_EAT(lexer, i) \
    ((lexer)->index += (i))

#define LEXER_P(lexer, start) \
    ((lexer)->input + start)

#define LEXER_S(lexer, start) \
    ((lexer)->index - start)

#define LEXER_NEWLINE(lexer)                        \
    do {                                            \
        (lexer)->source.line++;                     \
        (lexer)->source.character = (lexer)->index; \
    } while (0)

#define LEXER_ISEOF(lexer) \
    ((lexer)->index >= (lexer)->size)

#define LEXER_ERROR(lexer, code_)                          \
    do {                                                   \
        (lexer)->error.code = code_;                       \
        (lexer)->error.source.line = (lexer)->source.line; \
        (lexer)->error.source.column =                     \
            (lexer)->index - (lexer)->source.character;    \
        (lexer)->error.source.character = (lexer)->index;  \
    } while (0)

static cfgm_string_ref_t boolean_true = { "true", 4 };
static cfgm_string_ref_t boolean_false = { "false", 4 };

bool cfgm_lexer_skip_eol(cfgm_cml_lexer_t *lexer)
{
    char c = LEXER_LA(lexer, 0);
    char n = LEXER_LA(lexer, 1);
    if (cfgm_lexer_is_eol(c)) {
        if (c == '\r' && n == '\n') LEXER_EAT(lexer, 1);
        if (c == '\n' && n == '\r') LEXER_EAT(lexer, 1);
        LEXER_EAT(lexer, 1);
        LEXER_NEWLINE(lexer);
        return true;
    }
    return false;
}

void cfgm_lexer_skip_space(cfgm_cml_lexer_t *lexer)
{
    while (lexer->index < lexer->size) {
        if (cfgm_lexer_is_whitespace(LEXER_LA(lexer, 0))) {
            LEXER_EAT(lexer, 1);
        } else if (!cfgm_lexer_skip_eol(lexer)) {
            break;
        }
    }
}

void cfgm_lexer_skip_comment(cfgm_cml_lexer_t *lexer)
{
    if (LEXER_LA(lexer, 0) != '#') {
        return;
    }
    while (!cfgm_lexer_is_eol(LEXER_LA(lexer, 1))) {
        LEXER_EAT(lexer, 1);
    }
    (void)cfgm_lexer_skip_eol(lexer);
}

cfgm_token_t *cfgm_lexer_scan_ident(cfgm_cml_lexer_t *lexer)
{
    unsigned int start = lexer->index;
    LEXER_EAT(lexer, 1);
    while (cfgm_lexer_is_word_part(LEXER_LA(lexer, 0))) {
        LEXER_EAT(lexer, 1);
    }

    cfgm_string_ref_t value = {
        .buf = LEXER_P(lexer, start),
        .size = LEXER_S(lexer, start)
    };

    return cfgm_token_factory_new_token(
        lexer->factory, TK_ID, &value, &lexer->source);
}

cfgm_token_t *cfgm_lexer_scan_string_literal(cfgm_cml_lexer_t *lexer)
{
    char quote = LEXER_LA(lexer, 0);
    LEXER_EAT(lexer, 1);
    unsigned int start = lexer->index;
    while (!LEXER_ISEOF(lexer)) {
        char c = LEXER_LA(lexer, 0);
        LEXER_EAT(lexer, 1);
        if (c == quote) break;
        if (LEXER_ISEOF(lexer) || cfgm_lexer_is_eol(c)) {
            LEXER_ERROR(lexer, CFGM_EUNFINISHED_STRING);
            return NULL;
        }
    }

    cfgm_string_ref_t value = {
        .buf = LEXER_P(lexer, start),
        .size = LEXER_S(lexer, start) - 1
    };
    return cfgm_token_factory_new_token(
        lexer->factory, TK_STRING, &value, &lexer->source);
}

unsigned int cfgm_lexer_scan_hex_literal(cfgm_cml_lexer_t *lexer)
{
    LEXER_EAT(lexer, 2);
    unsigned int start = lexer->index;
    if (!cfgm_is_hex_digit(LEXER_LA(lexer, 0))) {
        LEXER_ERROR(lexer, CFGM_EMALFORMED_NUMBER);
        return 0;
    }

    while (cfgm_is_hex_digit(LEXER_LA(lexer, 0))) {
        LEXER_EAT(lexer, 1);
    }
    
    if (LEXER_S(lexer, start) > 8) {
        LEXER_ERROR(lexer, CFGM_EMALFORMED_NUMBER);
        return 0;
    }

    return lexer->index;
}

unsigned int cfgm_lexer_scan_dec_literal(cfgm_cml_lexer_t *lexer)
{
    unsigned int start = lexer->index;
    while (isdigit(LEXER_LA(lexer, 0))) {
        LEXER_EAT(lexer, 1);
    }

    if (LEXER_LA(lexer, 0) == '.') {
        LEXER_EAT(lexer, 1);
        while (isdigit(LEXER_LA(lexer, 0))) {
            LEXER_EAT(lexer, 1);
        }
    }

    char c = LEXER_LA(lexer, 0);
    if (c == 'e' || c == 'E') {
        LEXER_EAT(lexer, 1);
        c = LEXER_LA(lexer, 0);
        if (c == '+' || c == '-') {
            LEXER_EAT(lexer, 1);
        }
        c = LEXER_LA(lexer, 0);
        if (!isdigit(c)) {
            LEXER_ERROR(lexer, CFGM_EMALFORMED_NUMBER);
            return 0;
        }
        while (isdigit(LEXER_LA(lexer, 0))) {
            LEXER_EAT(lexer, 1);
        }
    }
    return lexer->index;
}

cfgm_token_t *cfgm_lexer_scan_numeric_literal(cfgm_cml_lexer_t *lexer)
{
    unsigned int start = lexer->index;
    char c = LEXER_LA(lexer, 0);
    char n = LEXER_LA(lexer, 1);
    unsigned int end = (c == '0' && (n == 'x' || n == 'X')) ?
        cfgm_lexer_scan_hex_literal(lexer) :
        cfgm_lexer_scan_dec_literal(lexer);
    if (end <= start) {
        LEXER_ERROR(lexer, CFGM_EMALFORMED_NUMBER);
        return NULL;
    }

    cfgm_string_ref_t value = {
        .buf = LEXER_P(lexer, start),
        .size = end - start
    };
    return cfgm_token_factory_new_token(
        lexer->factory, TK_NUMBER, &value, &lexer->source);
}

cfgm_token_t *cfgm_cml_lexer_lex(cfgm_cml_lexer_t *lexer)
{
    cfgm_lexer_skip_space(lexer);
    cfgm_lexer_skip_comment(lexer);
    if (LEXER_ISEOF(lexer)) {
        return NULL;
    }

    if (cfgm_lexer_is_word_start(LEXER_LA(lexer, 0))) {
        return cfgm_lexer_scan_ident(lexer);
    }

    cfgm_string_ref_t value = {0};
    switch (LEXER_LA(lexer, 0)) {
        case '=':
        case ',':
        case ';':
        case '{':
        case '[':
        case ']':
        case '}':
            value.buf = LEXER_P(lexer, lexer->index);
            value.size = 1;
            LEXER_EAT(lexer, 1);
            return cfgm_token_factory_new_token(
                lexer->factory, TK_PUNCT, &value, &lexer->source);
        case '\"':
            return cfgm_lexer_scan_string_literal(lexer);
        case 48:
        case 49:
        case 50:
        case 51:
        case 52:
        case 53:
        case 54:
        case 55:
        case 56:
        case 57:
            return cfgm_lexer_scan_numeric_literal(lexer);
        case '.':
            if (isdigit(LEXER_LA(lexer, 1))) {
                return cfgm_lexer_scan_numeric_literal(lexer);
            }
            LEXER_ERROR(lexer, CFGM_EMALFORMED_NUMBER);
            return NULL;
        default: break;
    }
    return NULL;
}

cfgm_cml_lexer_t *cfgm_cml_lexer_create(const char *input, unsigned int size,
    cfgm_token_factory_t *factory)
{
    if (factory == NULL) return NULL;
    cfgm_cml_lexer_t *lexer = malloc(sizeof(cfgm_cml_lexer_t));
    if (lexer != NULL) {
        lexer->input = input;
        lexer->size = size;
        lexer->index = 0;
        lexer->factory = factory;
        lexer->error.code = CFGM_OK;
        lexer->source.line = 0;
        lexer->source.column = 0;
        lexer->source.character = 0;
    }
    return lexer;
}

void cfgm_cml_lexer_destroy(cfgm_cml_lexer_t *lexer)
{
    if (lexer != NULL) {
        free(lexer);
    }
}

#ifdef __cplusplus
}
#endif
